Theano - Nuestra primera red neuronal

En este caso, vamos a trabajar con los mismos datos a los utilizados en el ejemplo de Logistic Regression pero usando un perceptrón multi-capa.

Cargamos los datos de igual forma que antes:


In [1]:
import numpy
import theano
import theano.tensor as T
import scipy.io as io
        
print '... cargando datos'
data=io.loadmat('dataLR.mat',squeeze_me=True)
dataIn=data['data'][:,0:2].astype(theano.config.floatX)
dataOut = data['data'][:,2].astype(int)


... cargando datos
Using gpu device 0: GeForce GTX 775M

Clase CapaOculta


In [2]:
class CapaOculta(object):
    def __init__(self, rng, input, n_in, n_out, W=None, b=None,
                 activation=T.tanh):
        """
        Capa oculta típica de un MLP: las neuronas están todas conectadas y tienen una función de activación simoidea.
        La matriz de pesos "W" es de la forma (n_in,n_out)
        y el vector bias "b" (n_out,).

        Nota : Usamos TANH

        La función de activación viene dada por: tanh(dot(input,W) + b)

        :type rng: numpy.random.RandomState
        :param rng: Generador de número aleatorios para inicializar los pesos

        :type input: theano.tensor.dmatrix
        :param input: Un tensor simbólico para definir los datos de entrada (n_examples, n_in)

        :type n_in: int
        :param n_in: dimensionalidad de la entrada

        :type n_out: int
        :param n_out: número de neuronas ocultas

        :type activation: theano.Op or function
        :param activation: Función usada en la capa oculta
        """
        self.input = input
       
        if W is None:
            W_values = numpy.asarray(
                rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX  # @UndefinedVariable
            )
            if activation == T.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name='W', borrow=True)

        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name='b', borrow=True)

        self.W = W
        self.b = b

        lin_output = T.dot(input, self.W) + self.b
        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )
        #Parámetros del modelo
        self.params = [self.W, self.b]

Clase LogisticRegression


In [3]:
class LogisticRegression(object):
    """Multi-class Logistic Regression Class

    The logistic regression is fully described by a weight matrix :math:`W`
    and bias vector :math:`b`. Classification is done by projecting data
    points onto a set of hyperplanes, the distance to which is used to
    determine a class membership probability.
    """

    def __init__(self, input, n_in, n_out):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """
        # start-snippet-1
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = theano.shared(
            value=numpy.zeros(
                (n_in, n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        # initialize the baises b as a vector of n_out 0s
        self.b = theano.shared(
            value=numpy.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )

        # symbolic expression for computing the matrix of class-membership
        # probabilities
        # Where:
        # W is a matrix where column-k represent the separation hyper plain for
        # class-k
        # x is a matrix where row-j  represents input training sample-j
        # b is a vector where element-k represent the free parameter of hyper
        # plain-k
        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)

        # symbolic description of how to compute prediction as class whose
        # probability is maximal
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        # end-snippet-1

        # parameters of the model
        self.params = [self.W, self.b]

    def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # start-snippet-2
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
        # end-snippet-2

    def errors(self, y):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()

Clase MLP


In [4]:
class MLP(object):
    """Clase Perceptrón multicapa

    Vamos a definir una sola capa oculta usando la clase CapaOculta que hemos creado anteriormente, y usaremos una capa de
    salida tipo softmax para la que usaremos la clase LogisticRegression.
    """

    def __init__(self, rng, input, n_in, n_hidden, n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: Generador de número aleatorios para la inicialización de los pesos

        :type input: theano.tensor.TensorType
        :param input: Variable simbólica para la entrada al MLP

        :type n_in: int
        :param n_in: Número de neuronas de entrada

        :type n_hidden: int
        :param n_hidden: Número de neuronas en la capa oculta

        :type n_out: int
        :param n_out: Número de neuronas en la capa de salida

        """

        # Creamos la capa oculta
        self.hiddenLayer = CapaOculta(
            rng=rng,
            input=input,
            n_in=n_in,
            n_out=n_hidden,
            activation=T.tanh
        )

        # La capa LR tendrá como entrada las neuronas de la capa oculta
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out
        )
        # end-snippet-2 start-snippet-3
        # L1 norm: Nos sirve para regularizar.
        self.L1 = (
            abs(self.hiddenLayer.W).sum()
            + abs(self.logRegressionLayer.W).sum()
        )

        # square of L2 norm: otra forma de regularizar.
        self.L2_sqr = (
            (self.hiddenLayer.W ** 2).sum()
            + (self.logRegressionLayer.W ** 2).sum()
        )

        # Return the mean of the negative log-likelihood of the prediction
        # of this model under a given target distribution.
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likelihood
        )
        # Almacenamos los errores
        self.errors = self.logRegressionLayer.errors

        # Guardamos como parámetros los parámetros de las dos capas
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params

Algoritmo de testeo


In [ ]:
learning_rate=0.1
L1_reg=0.00
L2_reg=0.0001
n_epochs=10000
batch_size=20
n_hidden=10

train_set_x = theano.shared(dataIn)
train_set_y = theano.shared(dataOut)
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    
print '... building the model'
    
index = T.iscalar()  # Índice del lote
x = T.matrix('x')  # Datos de entrada
y = T.lvector('y')  # Datos de salida esperados

rng = numpy.random.RandomState(1234)

# Construimos el objeto MLP
classifier = MLP(
    rng=rng,
    input=x,
    n_in=2,
    n_hidden=n_hidden,
    n_out=1
)
    
# Función de coste a minimizar
cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
)
    
# Calculamos el gradiente de la función de coste con respecto a los parámetros de las dos capas
gparams = [T.grad(cost, param) for param in classifier.params]

# Y definimos las actualizaciones de los parámetros
updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
]

print train_set_x.dtype
print train_set_y.dtype
print index.dtype
# Compilamos la función de aprendizaje
train_model = theano.function(
    inputs=[index],
    outputs=cost,
    updates=updates,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size],
        y: train_set_y[index * batch_size: (index + 1) * batch_size]
    }
)
    
print '... entrenando'
epoch = 0
while (epoch < n_epochs):
        epoch = epoch + 1
        minibatch_avg_cost = 0
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = minibatch_avg_cost + train_model(minibatch_index)
        print "Época: " + repr(epoch) + " - Error medio: " + repr(minibatch_avg_cost/n_train_batches)

predict = theano.function(
    inputs=[index],
    outputs=classifier.logRegressionLayer.y_pred,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size]
    }
)

test = [predict(i) for i
        in xrange(n_train_batches)]
print test

Ejercicio

Utilizar la base de datos "digits.mat" y configurar nuestro MLP para esta base de datos.


In [ ]: